if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")}
if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("BiocManager")) {install.packages("BiocManager"); require("BiocManager")}
if (!require("tibble")) {install.packages("tibble"); require("tibble")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")} #color
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("DESeq2")) {BiocManager::install('DESeq2'); require("DESeq2")}
if (!require("here")) {install.packages("here"); require("here")}
if (!require("stringr")) {install.packages("stringr"); require("stringr")}
if (!require("car")) {install.packages("car"); require("car")}
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")}
if (!require("readxl")) {install.packages("readxl"); require("readxl")}
if (!require("data.table")) {install.packages("data.table"); require("data.table")}
if (!require("ggvenn")) {install.packages("ggvenn"); require("ggvenn")}
if (!require("kableExtra")) {install.packages("kableExtra"); require("kableExtra")} # for color brewer
if (!require("gplots")) {install.packages("gplots"); require("gplots")} # for color brewer
if (!require("clusterProfiler")) {BiocManager::install('clusterProfiler'); require("clusterProfiler")}
if (!require("enrichplot")) {BiocManager::install('enrichplot'); require("enrichplot")}
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")} # for data frame transformation
if (!require("plotly")) {install.packages("plotly"); require("plotly")}
library("EnhancedVolcano")
# Install from scratch
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("RSQLite") # Core dependency
BiocManager::install("org.Mm.eg.db")
BiocManager::install("clusterProfiler")
library(org.Mm.eg.db)
library(clusterProfiler)
library(AnnotationDbi)
library(dplyr)
library(magrittr)
library("EnhancedVolcano")
library(UpSetR)options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320## [1] 79456894976
UMAP plot summarizing the final clustering analysis of the dataset. Clusters were identified using the top 15 principal components and resolution 1 following SCTransform normalization. The UMAP visualization displays the separation of distinct cell populations.
seu <- SCTransform(seu) %>%
RunPCA() %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 1) %>%
RunUMAP(dims = 1:15)## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11384
## Number of edges: 353418
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7665
## Number of communities: 17
## Elapsed time: 1 seconds
Every cluster here represents the macula densa
Markers are from a
known list used to identify kidney cell types
DotPlot(seu,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()Cluster markers were identified for each group to characterize
cellular diversity within the dataset.The accompanying heatmap
highlights the top three genes per cluster, ranked by average log2 fold
change (avg_log2FC) and filtered for statistical
significance (p_val <0.05). This analysis revealed
expression patterns, such as distinct Egf and Umod expression in select
clusters and very mild differential expression among the first five
clusters, supporting the classification of macula densa subtypes.
Additionally, selected genes—including Ptger3, Pappa2, Egf,
Foxq1, Fos, Egr1, and Cxcl10—were visualized to further
illustrate key differences and marker distributions across
clusters.
all_markers <- FindAllMarkers(seu, only.pos = TRUE, min.pct = 0.5, logfc.threshold = 0.5)
all_markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 0.5, p_val_adj < 0.05) %>%
slice_head(n = 3) %>%
ungroup() -> top3
DoHeatmap(seu, features = top3$gene) + NoLegend()The seurat cluster order was changed to visually show similarities
between clusters on a multidimensional dotplot
The Dotplot shows 4 patterns in macula densa. There were clusters
highly expressing Pappa2,Egf,Fos,and Cxcl10. These became the hallmark
markers for identifying my macula densa types. Within each type
identification besides Cxcl10, Theres unique expressions of a few
specific genes.
Type 1 Clusters are a bit difficult but some markers that could help identify are Ptger3 and Pappa2.
Type 2 Expresses high Egf, with its subcluster expressing high Foxq1
(there are more but this is an example)
Type 3 Expresses high in Fos, it’s subcluster shows high expression
of Egr1.
seu@meta.data$seurat_clusters <-
factor(
seu@meta.data$seurat_clusters,
levels = c(0, 3, 1, 2, 4, 7, 12, 13,5,9, 6, 16, 10, 8, 11, 14, 15)
)
Idents(seu) <- "seurat_clusters"
DotPlot(seu,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()seu@meta.data <- seu@meta.data %>%
mutate(
md_subtype = dplyr::case_when(
seurat_clusters %in% c(0,3) ~ "type_1a",
seurat_clusters %in% c(7) ~ "type_1b",
seurat_clusters %in% c(1,2,4,12,13,5) ~ "type_1c",
seurat_clusters %in% c(9,6,16,10) ~ "type_2a",
seurat_clusters %in% c(8) ~ "type_2b",
seurat_clusters %in% c(11) ~ "type_3a",
seurat_clusters %in% c(14) ~ "type_3b",
seurat_clusters == 15 ~ "type_4"
)
)
DimPlot(object = seu, reduction = "umap", group.by = "md_subtype", label = TRUE)UMAP visualization showing the macula densa cell populations grouped
into four major types, based on re-annotation using aggregated
md_subtype classifications. Cells originally overclustered into subtypes
(e.g., type_1a, type_1b, type_1c) were merged into broader macula densa
types (type_1 through type_4) according to shared DEG markers.
seu@meta.data <- seu@meta.data %>%
mutate(
md_type = case_when(
md_subtype %in% c("type_1a", "type_1b", "type_1c") ~ "type_1",
md_subtype %in% c("type_2a", "type_2b") ~ "type_2",
md_subtype %in% c("type_3a", "type_3b") ~ "type_3",
md_subtype == "type_4" ~ "type_4",
)
)
DimPlot(object = seu, reduction = "umap", group.by = "md_type", label = TRUE)This shows our four distinct clusters
Diagonal Dotplot of Macula Densa Subtype Markers (more specific)
seu@meta.data$md_subtype <-
factor(seu@meta.data$md_subtype,
levels = c("type_1a","type_1b","type_1c","type_2a","type_2b", "type_3a", "type_3b", "type_4"))
Idents(seu) <- "md_subtype"
DotPlot(seu,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()Diagonal Dotplot of Macula Densa Type Markers
Idents(seu) <- "md_type"
markers.to.plot4 <- c("Pappa2","Robo2","Egf","Umod","Jun","Fos","Cxcl10","Isg15")
DotPlot(seu,
features = markers.to.plot4,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()Google Drive Link (Contains Excel Files)
Some clusters reveal rare but biologically informative distinctions,
which can highlight subtle cell states or activation.
For example,
S100g is a marker highly expressed specifically in cluster
12, but it exhibits strong sample-specific variation, notably
being much higher in sample SO4 compared to other
samples.
This pattern suggests that while S100g is a useful marker
for identifying this cluster, its expression may be influenced by batch
effects or sample-specific conditions and should be interpreted with
caution.
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
all_markers <- all_markers %>%
filter(p_val_adj != 1) %>%
arrange(desc(avg_log2FC)) %>%
dplyr::select(gene, everything())
# Split by cluster (ident column)
marker_list <- split(all_markers, all_markers$cluster)
# Sort names alphanumerically
sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]
# Create a workbook
wb <- createWorkbook()
# Add each cluster as a new worksheet
for (cluster_name in sorted_cluster_names) {
addWorksheet(wb, sheetName = cluster_name)
writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])
}
date <- format(Sys.Date(), "%Y%m%d")
# Save workbook
saveWorkbook(wb, here("jk_code", paste0(date, "_", "_SO4_FindAllMarkers_By_Cluster.xlsx")), overwrite = TRUE)Marker genes defined by subtypes tend to be more robust and
consistent, reliably distinguishing functional or transcriptional
subpopulations within the macula densa.
These subtype markers are less likely to be confounded by technical
artifacts and better represent genuine biological variation related to
cell states or functions.
These subtypes are consistent with changes in principal
components.
Idents(seu) <- "md_subtype"
subtype_markers <- FindAllMarkers(seu,
only.pos = TRUE,
logfc.threshold = 0.1,
min.pct = 0.1,
min.diff.pct = 0.05,
return.thresh = 0.05)
subtype_markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) %>%
slice_head(n = 3) %>%
ungroup() -> top3subtype
DoHeatmap(seu, features = top3subtype$gene) + NoLegend()# Removing insignificant genes
subtype_markers <- subtype_markers %>%
filter(p_val_adj != 1) %>%
arrange(desc(avg_log2FC)) %>%
dplyr::select(gene, everything())
# Split by cluster (ident column)
marker_list <- split(subtype_markers, subtype_markers$cluster)
# Sort names alphanumerically
sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]
# Create a workbook
wb <- createWorkbook()
# Add each cluster as a new worksheet
for (cluster_name in sorted_cluster_names) {
addWorksheet(wb, sheetName = cluster_name)
writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])
}
date <- format(Sys.Date(), "%Y%m%d")
# Save workbook
saveWorkbook(wb, here("jk_code", paste0(date, "_", "_SO4_FindAllMarkers_By_Subtype.xlsx")), overwrite = TRUE)At the broader MD type level, certain markers allow clear
differentiation between major macula densa subtypes.
These general markers serve as reliable signatures to classify each
MD type, providing consistent identification of the main macula densa
categories in this dataset.
Idents(seu) <- "md_type"
type_markers <- FindAllMarkers(seu,
only.pos = TRUE,
logfc.threshold = 0.1,
min.pct = 0.1,
min.diff.pct = 0.05,
return.thresh = 0.05)
type_markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) %>%
slice_head(n = 3) %>%
ungroup() -> top3type
DoHeatmap(seu, features = top3type$gene) + NoLegend()type_markers <- type_markers %>%
filter(p_val_adj != 1) %>%
arrange(desc(avg_log2FC)) %>%
dplyr::select(gene, everything())
# Split by cluster (ident column)
marker_list <- split(type_markers, type_markers$cluster)
# Sort names alphanumerically
sorted_cluster_names <- names(marker_list)[order(as.numeric(names(marker_list)))]
# Create a workbook
wb <- createWorkbook()
# Add each cluster as a new worksheet
for (cluster_name in sorted_cluster_names) {
addWorksheet(wb, sheetName = cluster_name)
writeData(wb, sheet = cluster_name, marker_list[[cluster_name]])
}
date <- format(Sys.Date(), "%Y%m%d")
# Save workbook
saveWorkbook(wb, here("jk_code", paste0(date, "_", "_SO4_FindAllMarkers_By_MDtype.xlsx")), overwrite = TRUE)Link Includes the Marker DEGs as well as the Gene Ontology of each
Macula Densa Type and Subtype.
Google Drive Link (Contains Excel Files)
# Filter Before Independently before you Run this
# Creating a function to Repeat Easily
pathway_analysis <- function(type_markers, type_name)
{
# Arrange markers by log2 fold change
df2 <- type_markers %>% arrange(desc(avg_log2FC))
DEG_list <- df2
markers <- DEG_list %>% mutate(SYMBOL = gene)
ENTREZ_list <- bitr(
geneID = DEG_list$gene,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db
)
markers <- ENTREZ_list %>% inner_join(markers, by = "SYMBOL")
# Upregulated Pathway Code
pos_markers_up <- markers %>% dplyr::filter(avg_log2FC > 0) %>% arrange(desc(abs(avg_log2FC)))
pos_ranks_up <- pos_markers_up$ENTREZID[abs(pos_markers_up$avg_log2FC) > 0]
pos_go_up <- enrichGO(gene = pos_ranks_up, OrgDb = "org.Mm.eg.db", ont = "BP", readable = TRUE)
chart_up <- dotplot(pos_go_up) +
ggtitle(paste(type_name, "Functions")) +
theme_classic() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "left",
axis.text.y = element_text(hjust = 0, size = 10)
) +
scale_y_discrete(position = "right", labels = function(x) str_wrap(x, width = 25))
return(list(
chart_up = chart_up,
pos_go_up = pos_go_up
))
}# Creating List of Marker genes to do Pathway Analysis
type1_markers <- type_markers[type_markers$cluster == "type_1", ]
type2_markers <- type_markers[type_markers$cluster == "type_2", ]
type3_markers <- type_markers[type_markers$cluster == "type_3", ]
type4_markers <- type_markers[type_markers$cluster == "type_4", ]
# Creating a Pos Go and Chart using PA function
results_type1 <- pathway_analysis(subset(type1_markers, avg_log2FC > 0.4), "type 1")
results_type2 <- pathway_analysis(subset(type2_markers, avg_log2FC > 0.72), "type 2")
results_type3 <- pathway_analysis(subset(type3_markers, avg_log2FC > 0), "type 3")
results_type4 <- pathway_analysis(subset(type4_markers, avg_log2FC > 0), "type 4")## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:324] "19283" "380785" "239435" "23850" "210801" "320685" "72309" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...394 enriched terms found
## 'data.frame': 394 obs. of 12 variables:
## $ ID : chr "GO:0048705" "GO:0001822" "GO:0072001" "GO:0003002" ...
## $ Description : chr "skeletal system morphogenesis" "kidney development" "renal system development" "regionalization" ...
## $ GeneRatio : chr "18/314" "20/314" "20/314" "21/314" ...
## $ BgRatio : chr "269/28928" "375/28928" "390/28928" "454/28928" ...
## $ RichFactor : num 0.0669 0.0533 0.0513 0.0463 0.1 ...
## $ FoldEnrichment: num 6.16 4.91 4.72 4.26 9.21 ...
## $ zScore : num 8.91 7.99 7.76 7.34 9.04 ...
## $ pvalue : num 1.10e-09 6.52e-09 1.26e-08 3.22e-08 3.40e-08 ...
## $ p.adjust : num 4.11e-06 1.22e-05 1.57e-05 2.26e-05 2.26e-05 ...
## $ qvalue : num 3.09e-06 9.17e-06 1.18e-05 1.70e-05 1.70e-05 ...
## $ geneID : chr "Pappa2/Frem1/Mmp14/Zfand5/Hoxb9/Hoxb3/Enpp1/Hoxc8/Hoxb7/Hoxc4/Hoxd11/Flvcr1/Hoxd4/Hoxb4/Zfp950/Vdr/Hoxc9/Hoxb5" "Gfra1/Bmp2/Bmper/Cdkn1c/Robo2/Cited1/Ass1/Col4a3/Irx1/Nrp1/Col4a4/Enpp1/Bcl2/Hoxb7/Pax8/Ptk7/Gdf11/Hoxd11/Irx2/Zfp950" "Gfra1/Bmp2/Bmper/Cdkn1c/Robo2/Cited1/Ass1/Col4a3/Irx1/Nrp1/Col4a4/Enpp1/Bcl2/Hoxb7/Pax8/Ptk7/Gdf11/Hoxd11/Irx2/Zfp950" "Bmp2/Robo2/Irx1/Hoxb9/Nrp1/Hoxb3/Hoxc8/Hoxb7/Pax8/Hoxc4/Gdf11/Hoxd11/Irx2/Hoxd4/Hoxb4/Hipk2/Arl13b/Ppp2r3a/Tasor/Hoxc9/Hoxb5" ...
## $ Count : int 18 20 20 21 11 15 12 15 18 17 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:217] "330428" "16513" "268379" "53315" "15220" "69824" "13645" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...331 enriched terms found
## 'data.frame': 331 obs. of 12 variables:
## $ ID : chr "GO:0031589" "GO:0015980" "GO:0031638" "GO:0010810" ...
## $ Description : chr "cell-substrate adhesion" "energy derivation by oxidation of organic compounds" "zymogen activation" "regulation of cell-substrate adhesion" ...
## $ GeneRatio : chr "19/211" "15/211" "8/211" "12/211" ...
## $ BgRatio : chr "377/28928" "380/28928" "82/28928" "235/28928" ...
## $ RichFactor : num 0.0504 0.0395 0.0976 0.0511 0.039 ...
## $ FoldEnrichment: num 6.91 5.41 13.38 7 5.35 ...
## $ zScore : num 9.9 7.42 9.62 7.92 6.85 ...
## $ pvalue : num 5.18e-11 1.49e-07 1.57e-07 1.78e-07 1.16e-06 ...
## $ p.adjust : num 1.61e-07 1.38e-04 1.38e-04 1.38e-04 7.20e-04 ...
## $ qvalue : num 1.22e-07 1.05e-04 1.05e-04 1.05e-04 5.48e-04 ...
## $ geneID : chr "Ntn4/Tacstd2/Fermt1/Jag1/Ccn1/Kank1/Rhod/Dbn1/Thbs1/Fzd4/Plau/L1cam/Lamb1/Itgb6/Epb41l5/Bcam/Tnfrsf12a/Itgb8/Gas6" "Mrap2/Ppargc1a/Gabarapl1/Idh2/Cycs/Idh3a/Got1/Eva1a/Aco2/Ndufs2/Pfkm/Cs/Trap1/Uqcrfs1/Ppif" "Prss12/Ggt1/Thbs1/Plau/Cyfip2/Eno1/Pgk1/Klk1b9" "Tacstd2/Fermt1/Jag1/Ccn1/Kank1/Rhod/Dbn1/Thbs1/Fzd4/Plau/L1cam/Epb41l5" ...
## $ Count : int 19 15 8 12 13 15 8 14 13 11 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:104] "14282" "13653" "12702" "11910" "14281" "16598" "16477" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...485 enriched terms found
## 'data.frame': 485 obs. of 12 variables:
## $ ID : chr "GO:0010563" "GO:0045936" "GO:0042326" "GO:0001933" ...
## $ Description : chr "negative regulation of phosphorus metabolic process" "negative regulation of phosphate metabolic process" "negative regulation of phosphorylation" "negative regulation of protein phosphorylation" ...
## $ GeneRatio : chr "18/98" "18/98" "17/98" "16/98" ...
## $ BgRatio : chr "388/28928" "388/28928" "337/28928" "316/28928" ...
## $ RichFactor : num 0.0464 0.0464 0.0504 0.0506 0.0405 ...
## $ FoldEnrichment: num 13.7 13.7 14.9 14.9 11.9 ...
## $ zScore : num 14.7 14.7 15 14.5 13.2 ...
## $ pvalue : num 1.03e-15 1.03e-15 1.74e-15 1.19e-14 6.40e-14 ...
## $ p.adjust : num 1.20e-12 1.20e-12 1.35e-12 6.90e-12 2.97e-11 ...
## $ qvalue : num 7.60e-13 7.60e-13 8.57e-13 4.38e-12 1.89e-11 ...
## $ geneID : chr "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Ddit4/Gadd45a/Rgs2/Irs2/Dnaja1/Taf7/Spry1/Impact" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Ddit4/Gadd45a/Rgs2/Irs2/Dnaja1/Taf7/Spry1/Impact" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Gadd45a/Rgs2/Irs2/Dnaja1/Taf7/Spry1/Impact" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Gadd45a/Rgs2/Dnaja1/Taf7/Spry1/Impact" ...
## $ Count : int 18 18 17 16 17 17 13 12 11 15 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:29] "15945" "15957" "23962" "100038882" "15959" "67775" "16145" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...170 enriched terms found
## 'data.frame': 170 obs. of 12 variables:
## $ ID : chr "GO:0051607" "GO:0009615" "GO:0045088" "GO:0045069" ...
## $ Description : chr "defense response to virus" "response to virus" "regulation of innate immune response" "regulation of viral genome replication" ...
## $ GeneRatio : chr "14/29" "14/29" "11/29" "7/29" ...
## $ BgRatio : chr "329/28928" "412/28928" "496/28928" "100/28928" ...
## $ RichFactor : num 0.0426 0.034 0.0222 0.07 0.0631 ...
## $ FoldEnrichment: num 42.4 33.9 22.1 69.8 62.9 ...
## $ zScore : num 24 21.3 15 21.8 20.7 ...
## $ pvalue : num 3.05e-20 7.24e-19 8.83e-13 6.99e-12 1.47e-11 ...
## $ p.adjust : num 2.25e-17 2.67e-16 2.17e-10 1.29e-09 2.17e-09 ...
## $ qvalue : num 1.30e-17 1.54e-16 1.25e-10 7.41e-10 1.25e-09 ...
## $ geneID : chr "Cxcl10/Ifit1/Oasl2/Isg15/Ifit3/Rtp4/Igtp/Gbp7/Irgm1/Stat2/Ifih1/Zc3hav1/Znfx1/Bst2" "Cxcl10/Ifit1/Oasl2/Isg15/Ifit3/Rtp4/Igtp/Gbp7/Irgm1/Stat2/Ifih1/Zc3hav1/Znfx1/Bst2" "Isg15/Igtp/Gbp7/Parp14/Irgm1/Stat2/Ifih1/Zc3hav1/Znfx1/Nfkbia/Grn" "Oasl2/Isg15/Gbp7/Ifih1/Zc3hav1/Znfx1/Bst2" ...
## $ Count : int 14 14 11 7 7 6 7 6 7 7 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
pathway_list <- list(
"type 1" = results_type1$pos_go_up@result,
"type 2" = results_type2$pos_go_up@result,
"type 3" = results_type3$pos_go_up@result,
"type 4" = results_type4$pos_go_up@result
)
wb <- createWorkbook()
for (sheet_name in names(pathway_list)) {
addWorksheet(wb, sheet_name)
writeData(wb, sheet_name, pathway_list[[sheet_name]])
}
# Save the workbook
saveWorkbook(wb, here("jk_code", "md_type_pathways_DEGS.xlsx"), overwrite = TRUE)# Subset markers by subtype
subtype1a_markers <- subset(subtype_markers, cluster == "type_1a")
subtype1b_markers <- subset(subtype_markers, cluster == "type_1b")
subtype1c_markers <- subset(subtype_markers, cluster == "type_1c")
subtype2a_markers <- subset(subtype_markers, cluster == "type_2a")
subtype2b_markers <- subset(subtype_markers, cluster == "type_2b")
subtype3a_markers <- subset(subtype_markers, cluster == "type_3a")
subtype3b_markers <- subset(subtype_markers, cluster == "type_3b")
# Run pathway analysis for each subtype (adjust thresholds as appropriate)
results_type1a <- pathway_analysis(subset(subtype1a_markers, avg_log2FC > 0.5), "type 1a")
results_type1b <- pathway_analysis(subset(subtype1b_markers, avg_log2FC > 0.6), "type 1b")
results_type1c <- pathway_analysis(subset(subtype1c_markers, avg_log2FC > 0), "type 1c")
results_type2a <- pathway_analysis(subset(subtype2a_markers, avg_log2FC > 1), "type 2a")
results_type2b <- pathway_analysis(subset(subtype2b_markers, avg_log2FC > 2), "type 2b")
results_type3a <- pathway_analysis(subset(subtype3a_markers, avg_log2FC > 0), "type 3a")
results_type3b <- pathway_analysis(subset(subtype3b_markers, avg_log2FC > 0), "type 3b")## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:194] "239435" "19283" "23850" "12960" "72309" "320685" "66815" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...47 enriched terms found
## 'data.frame': 47 obs. of 12 variables:
## $ ID : chr "GO:0002526" "GO:0006953" "GO:0141156" "GO:0009636" ...
## $ Description : chr "acute inflammatory response" "acute-phase response" "cAMP/PKA signal transduction" "response to toxic substance" ...
## $ GeneRatio : chr "9/187" "6/187" "4/187" "9/187" ...
## $ BgRatio : chr "122/28928" "51/28928" "25/28928" "234/28928" ...
## $ RichFactor : num 0.0738 0.1176 0.16 0.0385 0.1429 ...
## $ FoldEnrichment: num 11.41 18.2 24.75 5.95 22.1 ...
## $ zScore : num 9.3 9.92 9.58 6.13 9.01 ...
## $ pvalue : num 1.06e-07 9.52e-07 1.92e-05 2.32e-05 3.07e-05 ...
## $ p.adjust : num 0.000276 0.001242 0.010416 0.010416 0.010416 ...
## $ qvalue : num 0.000235 0.001056 0.008855 0.008855 0.008855 ...
## $ geneID : chr "Nupr1/C2cd4b/Lbp/Ptgs2/Hfe/Ass1/Ptges/Gstp1/Plscr1" "Lbp/Ptgs2/Hfe/Ass1/Ptges/Plscr1" "Calcr/Ramp3/Sfn/Prkar1b" "Lcn2/Nupr1/Ptgs2/Ass1/Ptges/Slc22a18/Pon2/Gstp1/Cnp" ...
## $ Count : int 9 6 4 9 4 4 11 4 4 3 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:216] "380785" "74480" "244682" "103948" "19218" "171531" "54156" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...62 enriched terms found
## 'data.frame': 62 obs. of 12 variables:
## $ ID : chr "GO:0099173" "GO:0031346" "GO:0060996" "GO:0051647" ...
## $ Description : chr "postsynapse organization" "positive regulation of cell projection organization" "dendritic spine development" "nucleus localization" ...
## $ GeneRatio : chr "13/203" "15/203" "8/203" "5/203" ...
## $ BgRatio : chr "314/28928" "471/28928" "138/28928" "44/28928" ...
## $ RichFactor : num 0.0414 0.0318 0.058 0.1136 0.0332 ...
## $ FoldEnrichment: num 5.9 4.54 8.26 16.19 4.74 ...
## $ zScore : num 7.34 6.51 7.19 8.48 5.75 ...
## $ pvalue : num 3.87e-07 1.36e-06 6.23e-06 1.41e-05 2.44e-05 ...
## $ p.adjust : num 0.00113 0.00199 0.00609 0.01032 0.01283 ...
## $ qvalue : num 0.000954 0.001674 0.005123 0.008683 0.010798 ...
## $ geneID : chr "Epha4/Flrt2/Myh10/Kif1a/Magi2/Ptprs/Mpp2/Abl2/Prmt3/Sh3gl2/Tanc2/Ephb2/Cadm1" "Epha4/Nin/Ntn1/Shtn1/Cnr1/Crocc/Ccp110/Magi2/Vldlr/Sirt1/Abl2/Ccdc88a/Scn1b/Ephb2/Ttbk2" "Epha4/Mapk6/Kif1a/Ptprs/Srgap2/Prmt3/Tanc2/Ephb2" "Ntn1/Myh10/Kif1a/Sirt1/Tacc2" ...
## $ Count : int 13 15 8 5 11 7 10 10 3 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:41] "12309" "243813" "73102" "74442" "333050" "243537" "117600" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...241 enriched terms found
## 'data.frame': 241 obs. of 12 variables:
## $ ID : chr "GO:0006816" "GO:0003015" "GO:0043271" "GO:1903522" ...
## $ Description : chr "calcium ion transport" "heart process" "negative regulation of monoatomic ion transport" "regulation of blood circulation" ...
## $ GeneRatio : chr "8/38" "6/38" "5/38" "6/38" ...
## $ BgRatio : chr "473/28928" "256/28928" "141/28928" "279/28928" ...
## $ RichFactor : num 0.0169 0.0234 0.0355 0.0215 0.0157 ...
## $ FoldEnrichment: num 12.9 17.8 27 16.4 11.9 ...
## $ zScore : num 9.44 9.82 11.22 9.36 8.44 ...
## $ pvalue : num 1.53e-07 9.86e-07 1.13e-06 1.62e-06 1.67e-06 ...
## $ p.adjust : num 0.000239 0.000517 0.000517 0.000517 0.000517 ...
## $ qvalue : num 0.00013 0.000281 0.000281 0.000281 0.000281 ...
## $ geneID : chr "Cacna1d/Nos1/Ramp3/Bcl2/Atp2b4/Ptgs2/Usp2/Camk2d" "Cacna1d/Nos1/Ramp3/Atp2b4/Camk2d/Avpr1a" "Nos1/Bcl2/Ptgs2/Usp2/Camk2d" "Itga4/Cacna1d/Nos1/Atp2b4/Ptgs2/Avpr1a" ...
## $ Count : int 8 6 5 6 7 6 4 6 3 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:44] "19142" "57764" "20315" "19419" "22242" "432530" "19649" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...97 enriched terms found
## 'data.frame': 97 obs. of 12 variables:
## $ ID : chr "GO:0050678" "GO:0001667" "GO:1905330" "GO:0016055" ...
## $ Description : chr "regulation of epithelial cell proliferation" "ameboidal-type cell migration" "regulation of morphogenesis of an epithelium" "Wnt signaling pathway" ...
## $ GeneRatio : chr "7/44" "6/44" "4/44" "7/44" ...
## $ BgRatio : chr "432/28928" "278/28928" "75/28928" "467/28928" ...
## $ RichFactor : num 0.0162 0.0216 0.0533 0.015 0.022 ...
## $ FoldEnrichment: num 10.65 14.19 35.06 9.85 14.48 ...
## $ zScore : num 7.89 8.62 11.53 7.53 7.96 ...
## $ pvalue : num 3.75e-06 3.88e-06 5.23e-06 6.25e-06 2.41e-05 ...
## $ p.adjust : num 0.00253 0.00253 0.00253 0.00253 0.00723 ...
## $ qvalue : num 0.00161 0.00161 0.00161 0.00161 0.00461 ...
## $ geneID : chr "Cxcl12/Egf/Ccnd1/Sulf2/Gas1/Tacstd2/Bmp4" "Cxcl12/Fermt1/Amotl2/Tacstd2/Bmp4/Kank1" "Ntn4/Egf/Tacstd2/Bmp4" "Egf/Ccnd1/Sulf2/Fermt1/Amotl2/Sostdc1/Kank1" ...
## $ Count : int 7 6 4 7 5 5 5 5 5 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:81] "330428" "16513" "15220" "16373" "53315" "268379" "26944" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...116 enriched terms found
## 'data.frame': 116 obs. of 12 variables:
## $ ID : chr "GO:1905330" "GO:0072006" "GO:0072080" "GO:0001822" ...
## $ Description : chr "regulation of morphogenesis of an epithelium" "nephron development" "nephron tubule development" "kidney development" ...
## $ GeneRatio : chr "6/79" "7/79" "6/79" "9/79" ...
## $ BgRatio : chr "75/28928" "180/28928" "113/28928" "375/28928" ...
## $ RichFactor : num 0.08 0.0389 0.0531 0.024 0.0504 ...
## $ FoldEnrichment: num 29.29 14.24 19.44 8.79 18.46 ...
## $ zScore : num 12.84 9.32 10.28 7.94 9.99 ...
## $ pvalue : num 5.93e-08 6.38e-07 6.85e-07 8.69e-07 9.29e-07 ...
## $ p.adjust : num 0.000116 0.000337 0.000337 0.000337 0.000337 ...
## $ qvalue : num 0.000083 0.000241 0.000241 0.000241 0.000241 ...
## $ geneID : chr "Tacstd2/Egf/Alox12/Bmp4/Sall1/Ntn4" "Irx3/Jag1/Tacstd2/Umod/Sulf2/Bmp4/Sall1" "Irx3/Jag1/Tacstd2/Umod/Bmp4/Sall1" "Irx3/Jag1/Tacstd2/Umod/Sulf2/Bmp4/Sall1/Lrrk2/Glis2" ...
## $ Count : int 6 7 6 9 6 5 9 6 9 3 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:37] "14281" "15511" "193740" "16476" "16477" "16007" "12702" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...243 enriched terms found
## 'data.frame': 243 obs. of 12 variables:
## $ ID : chr "GO:0061077" "GO:0051085" "GO:0051084" "GO:0006458" ...
## $ Description : chr "chaperone-mediated protein folding" "chaperone cofactor-dependent protein refolding" "'de novo' post-translational protein folding" "'de novo' protein folding" ...
## $ GeneRatio : chr "6/36" "5/36" "5/36" "5/36" ...
## $ BgRatio : chr "69/28928" "33/28928" "44/28928" "46/28928" ...
## $ RichFactor : num 0.087 0.152 0.114 0.109 0.167 ...
## $ FoldEnrichment: num 69.9 121.8 91.3 87.3 133.9 ...
## $ zScore : num 20.2 24.5 21.2 20.7 23 ...
## $ pvalue : num 2.71e-10 5.17e-10 2.34e-09 2.95e-09 2.11e-08 ...
## $ p.adjust : num 3.61e-07 3.61e-07 1.03e-06 1.03e-06 5.89e-06 ...
## $ qvalue : num 2.14e-07 2.14e-07 6.12e-07 6.12e-07 3.50e-06 ...
## $ geneID : chr "Hspa1b/Hspa1a/Hspb1/Dnajb4/Dnajb1/Hspa8" "Hspa1b/Hspa1a/Dnajb4/Dnajb1/Hspa8" "Hspa1b/Hspa1a/Dnajb4/Dnajb1/Hspa8" "Hspa1b/Hspa1a/Dnajb4/Dnajb1/Hspa8" ...
## $ Count : int 6 5 5 5 4 8 6 5 5 7 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
## $chart_up
##
## $pos_go_up
## #
## # over-representation test
## #
## #...@organism Mus musculus
## #...@ontology BP
## #...@keytype ENTREZID
## #...@gene chr [1:166] "13654" "13653" "11910" "14282" "12702" "16598" "14825" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...760 enriched terms found
## 'data.frame': 760 obs. of 12 variables:
## $ ID : chr "GO:0042326" "GO:0010563" "GO:0045936" "GO:0001933" ...
## $ Description : chr "negative regulation of phosphorylation" "negative regulation of phosphorus metabolic process" "negative regulation of phosphate metabolic process" "negative regulation of protein phosphorylation" ...
## $ GeneRatio : chr "20/157" "21/157" "21/157" "18/157" ...
## $ BgRatio : chr "337/28928" "388/28928" "388/28928" "316/28928" ...
## $ RichFactor : num 0.0593 0.0541 0.0541 0.057 0.0504 ...
## $ FoldEnrichment: num 10.94 9.97 9.97 10.5 9.29 ...
## $ zScore : num 13.6 13.1 13.1 12.5 11.6 ...
## $ pvalue : num 2.74e-15 3.30e-15 3.30e-15 1.47e-13 1.16e-12 ...
## $ p.adjust : num 3.19e-12 3.19e-12 3.19e-12 1.06e-10 6.71e-10 ...
## $ qvalue : num 2.12e-12 2.12e-12 2.12e-12 7.05e-11 4.45e-10 ...
## $ geneID : chr "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Irs2/Gadd45a/Rgs2/Spry1/Dnaja1/Cd"| __truncated__ "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Irs2/Gadd45a/Rgs2/Spry1/Dnaja1/Cd"| __truncated__ "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Irs2/Gadd45a/Rgs2/Spry1/Dnaja1/Cd"| __truncated__ "Socs3/Dusp1/Gadd45b/Jun/Irf1/Ppp1r15a/Gadd45g/Hspb1/Socs1/Cdkn1a/Errfi1/Trib1/Gadd45a/Rgs2/Spry1/Dnaja1/Taf7/Impact" ...
## $ Count : int 20 21 21 18 18 19 14 15 13 17 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
Assign your markers as a vector genes_to_plot <- c( “Fos”, “Socs3”, “Gadd45b”, “Wee1”, “Hspa1a”, “Sat1”, “Cxcl12”, “Itpr2”, “Bmp4”, “Casr”, “Grin2c”, “Irx3”, “Rap1gap”, “App”, “Wwc1”, “Syt5”, “Syn3”, “Cacna1d”, “Slc6a7”, “Robo2”, “Begain”, “Pappa1”, “Nos1”, “Ptgs2”, “Bmp2”, “Atp2a3”
## Warning: The following requested variables were not found: Pappa1
FeaturePlots
These are the genes plotted on figure C on the article
Each gene seems to be consistent with this macula dataset except Ccn1
MD 5 genes shown in this dataset
All of these genes are enriched in every cluster
## Warning: The following requested variables were not found: Pappa1
## Warning: The following requested variables were not found: Pappa1